Bisher habe ich nur mit dem Farnebäck- und TV-L1-Ansatz gearbeitet. Das opencv-Paket hat aber noch ein paar andere interessante Optionen zu bieten. Deshalb vergleichen wir jetzt mal folgende Ansätze:
Mal sehen, ob die optimierten wirklich besser abschneiden, als die unoptimierten. Als Vergleichsmaß benutzen wir den mittleren Absolutfehler:
$$\begin{equation} \mathrm{MAE} = \frac{\sum_{i=1}^{n} \left| \mathrm{Referenz}_i - \mathrm{Beobachtung}_i \right|} {n} \end{equation} $$import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import cv2
import xarray as xr
from analysis_tools import optical_flow as oflow
from analysis_tools import grid_and_interpolation as gi
from standard_config import *
import sys
sys.path.append("{}/utils/tracking".format(local_home_path))
sys.path.append("{}/utils".format(local_home_path))
import load_satellite_data as lsd
sys.path.append("/vols/satellit/home/lenk/utils/tracking")
import tracking_common as tco
import optical_flow_tracking as oft
import cross_correlation_tracking as cct
from skimage.feature import register_translation
import MSGtools as mst
from l15_msevi import msevi_config
import geooperations as go
import fixed_colourbar as fc
from plotting_tools.colormaps import enhanced_colormap2
emap = enhanced_colormap2()
Um ein paar Vergleichsfälle zu haben, laden wir uns die HACI-Tracks.
haci_objects = pd.read_csv("{}/HACI_bbox_data/haci-2008-2017-bbox-filtered.csv".format(local_data_path))
haci_objects.head()
dates = [pd.Timestamp(d).strftime("%Y%m%d") for d in haci_objects.date]
dd = [pd.Timestamp(d) for d in dates]
haci_objects = haci_objects.assign(date_str=dates)
haci_objects = haci_objects.assign(date = dd)
haci_objects_2013 = haci_objects[(haci_objects.date >= pd.to_datetime("2013-01-01T0000")) &
(haci_objects.date <= pd.to_datetime("2013-12-31T2359"))]
slon,slat = mst.get_msg_lon_lat('eu')
hlon = gi.make_hrv_upscaling(slon)
hlat = gi.make_hrv_upscaling(slat)
Wir laden uns die HACI-Koordinaten für einen Beispielfall und ermitteln die zugehörigen Zeitschritte und erzeugen Aussschnitte mit Satellitendaten.
case = haci_objects_2013[(haci_objects_2013.date=="2013-06-18") & (haci_objects_2013.id==2687)]
case
case.date.loc[5952]
case_data_path = "{}/HACI_track_data".format(local_data_path)
with xr.open_dataset("{}/track_{}_{}.nc".format(case_data_path,case.date.loc[5952].strftime("%Y%m%d"),case.id.loc[5952])) as file:
case_data = file.copy()
case_data
tlist = pd.date_range(start = case.date - pd.Timedelta("30min"),
end = case.date + pd.Timedelta("30min"),
freq = "5min")
# satellite_fields = msevi_config._narrow_channels + ['HRV','CT','CTTH_HEIGHT','CMa']
# sat_data = {ch:[] for ch in ['IR_108','CTTH_HEIGHT']}
# for i,t in enumerate(tlist):
# sat_obs = lsd.load_satellite_data(t,'IR_108')
# sat_data['CTTH_HEIGHT'].append(mst.get_nwcsaf_prod('CTTH_HEIGHT',t,calibrate=True))
# px_corr = go.parallax_correct_msg(sat_obs,slon,slat,sat_data['CTTH_HEIGHT'][i],'std','eu','rss',3)
# sat_data['IR_108'].append(px_corr)
# def get_radar_box_from_sat(sat_row, sat_col, sat_lat, sat_lon, box_size, radar_lat, radar_lon):
# row_min = sat_row - box_size // 2
# row_max = sat_row + box_size // 2 + 1
# col_min = sat_col - box_size // 2
# col_max = sat_col + box_size // 2 + 1
# lon_min = sat_lon[row_min, col_min]
# lon_max = sat_lon[row_max, col_max]
# lat_max = sat_lat[row_min, col_min]
# lat_min = sat_lat[row_max, col_max]
# r_idx = get_index_kdtree(np.array([[lat_min,lat_max], [lon_min,lon_max]]),radar_lat, radar_lon)
# return r_idx
# def cutout_box(data, row, col, box_size):
# cutout = gi.cutout_field4box(data,(row,col),box_size)
# return cutout
# def cutout_radar_from_sat(data, row, col, box_size, sat_lat, sat_lon, radar_lat, radar_lon):
# r_idx = get_radar_box_from_sat(row, col, sat_lat, sat_lon, box_size, radar_lat, radar_lon)
# data_cutout = data[r_idx[0][0]:r_idx[1][0],r_idx[0][1]:r_idx[1][1]]
# lon_cutout = radar_lon[r_idx[0][0]:r_idx[1][0],r_idx[0][1]:r_idx[1][1]]
# lat_cutout = radar_lat[r_idx[0][0]:r_idx[1][0],r_idx[0][1]:r_idx[1][1]]
# return data_cutout, lon_cutout, lat_cutout
def normalise2range(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
def array_to_256(array):
return (array*255.999).astype("uint8")
def day_natural_composite(vis006_data,vis008_data,nir016_data,factor=1,gamma=1):
blue = array_to_256(np.clip(vis006_data/factor,0,1)**(1./gamma))
green = array_to_256(np.clip(vis008_data/factor,0,1)**(1./gamma))
red = array_to_256(np.clip(nir016_data/factor,0,1)**(1./gamma))
return np.dstack([red,green,blue]).astype("uint8")
def scale_array_min_max(array_data,range_min=0,range_max=1):
"""
Scales a array into the chosen range.
Inputs:
-------
array_data: numpy array of floats or integers, 2d
array to scale
range_min: int or float, default = 0
minimum value of the range to scale array to,
range_max: int or float, default = 1
maximum value of the range to scale array to,
Returns:
--------
scaled_array: numpy array of floats, 2d
"""
# get array extrema
array_min = np.min(array_data)
array_max = np.max(array_data)
# derive conversion parameters
a = (range_max - range_min) / (array_max - array_min)
b = range_max - a * array_max
# scale array
scaled_array = a * array_data + b
return scaled_array
def add_hrv_texture2nc(nc,hrv):
nc_lab = color.rgb2lab(nc)
l_min = nc_lab[...,0].min()
l_max = nc_lab[...,0].max()
l_hrv_scaled = nc_lab[...,0] * hrv
l_hrv_scaled = scale_array_min_max(l_hrv_scaled,l_min,l_max)
nc_lab[...,0] = l_hrv_scaled
return color.lab2rgb(nc_lab)
tracking_base = case_data.ir108.data
fig,ax = plt.subplots(2,8,figsize=(32,8),sharex=True,sharey=True)
axs=ax.ravel()
for i, tb in enumerate(tracking_base):
#p = axs[i].pcolormesh(case_data.slon.data,case_data.slat.data,tb,vmin=210,vmax=300,cmap=emap)
p = axs[i].imshow(tb,vmin=210,vmax=300,cmap=emap)
axs[i].set_title("t = {} min".format(i*5 - 30))
axs[i].axis("off")
fig.delaxes(ax[1,5])
fig.delaxes(ax[1,6])
fig.delaxes(ax[1,7])
cbar = fig.colorbar(p, ax=ax.flat,label='BT(10.8 µm) / K')
# Make common axis labels
# fig.text(0.45, 0.02,u'longitude / °E', va='center', ha='center')
# fig.text(0.1, 0.5, u'latitude / °N', va='center', ha='center', rotation='vertical')
plt.savefig("/vols/satellite/home/lenk/example_case_ir108.png",bbox_inches='tight')
fig,ax = plt.subplots(2,8,figsize=(32,8),sharex=True,sharey=True)
axs=ax.ravel()
for i, tb in enumerate(tracking_base):
nc = day_natural_composite(case_data.vis006.data[i],case_data.vis008.data[i],case_data.ir016.data[i],0.9,1.8)
p = axs[i].imshow(nc)
axs[i].set_title("t = {} min".format(i*5 - 30))
axs[i].axis('off')
fig.delaxes(ax[1,5])
fig.delaxes(ax[1,6])
fig.delaxes(ax[1,7])
#cbar = fig.colorbar(p, ax=ax.flat)
#cbar.ax.set_title(u'BT(10.8 µm)')
# Make common axis labels
#fig.text(0.45, 0.02,u'longitude / °E', va='center', ha='center')
#fig.text(0.01, 0.5, u'latitutde / °N', va='center', ha='center', rotation='vertical')
plt.savefig("/vols/satellite/home/lenk/example_case_nc.png")
def MAE(now,obs):
mae = np.sum(np.abs(now-obs)) / (now.shape[0]*now.shape[1])
return mae
params = dict()
params['box_size'] = 5
params['upsample_factor'] = 1
flow_methods = {'simple':{'function':'cv2.optflow.createOptFlow_SimpleFlow()','label':'Simple -Flow'},
'farnebaeck':{'function':'cv2.optflow.createOptFlow_Farneback()', 'label':'Farnebäck Flow'},
#'farnebaeck_opt':{'function':'oft.calculate_optical_flow_farnebaeck(f0,f1)',
# 'label':'Farnebäck-Flow optimiert'},
'tvl1':{'function':'cv2.optflow.createOptFlow_DualTVL1()','label':'Dual-TV-L1-Flow'},
'dis_fast':{'function':'cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_FAST)',
'label':'DIS Flow fast'},
'dis_medium':{'function':'cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_MEDIUM)',
'label':'DIS Flow medium'},
'dis_superfast':{'function':'cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_ULTRAFAST)',
'label':'DIS flow very fast'},
#'xcorr':{'function':'cct.calc_cross_correlation_shift(f0,f1,cc_parameters=params)',
# 'label': 'Cross correlation tracking'}}
'xcorr': {'function':'register_translation(f0,f1)','label':'Cross correlation'}}
fig,ax = plt.subplots(1,3,figsize=(15,5), sharex=True, sharey=True)
p0 = ax[0].imshow(tracking_base[5],vmin=210,vmax=300,cmap=emap)
fc.colourbar(p0)
ax[0].set_title("BT(10.8 µm), t0")
ax[0].axis("off")
p1 = ax[1].imshow(tracking_base[6],vmin=210,vmax=300,cmap=emap)
fc.colourbar(p1)
ax[1].set_title("BT(10.8 µm), t1")
ax[1].axis("off")
diff_plot = ax[2].imshow(tracking_base[6]-tracking_base[5],cmap='RdBu_r',vmin=-20,vmax=20)
fc.colourbar(diff_plot)
ax[2].set_title("(t1 - t0)/ K")
ax[2].axis("off")
plt.savefig("/vols/satellite/home/lenk/bt_difference.png",bbox_inches='tight')
Im nächsten Schritt berechnen wir die Flussfelder für die gewählten Methoden.
flow_res = {k:[] for k in list(flow_methods.keys())}
flow_res
f0 = tco.transform_array2picture(np.max(tracking_base[5])-tracking_base[5])
f1 = tco.transform_array2picture(np.max(tracking_base[6])-tracking_base[6])
for fm in list(flow_methods.keys()):
if fm in ['farnebaeck_opt','tvl1_opt']:
res = eval(flow_methods[fm]['function'])
elif fm in ['xcorr']:
res = eval(flow_methods[fm]['function'])
res_tuples = []
for i in range(0,51*51):
res_tuples.append([-res[0][1],-res[0][0]])
res = np.array(res_tuples).reshape((51,51,2))
else:
flow = eval(flow_methods[fm]['function'])
res = flow.calc(f0,f1,None)
flow_res[fm] = res
Mit den Flussfeldern können wir die Bilder des ersten Zeitschrittes verschieben und die Fehler berechnen.
shifted_img = {fm:[] for fm in list(flow_methods.keys())}
mae = {fm:[] for fm in list(flow_methods.keys())}
for fm in list(flow_res.keys()):
shifted_img[fm] = oflow.morph_trans_opt_flow(tracking_base[5],flow_res[fm])
mae[fm] = MAE(tracking_base[6],shifted_img[fm])
Für den Vergleich ermitteln wir auch noch den MAE für die eulersche Perspektive.
shifted_img['euler'] = tracking_base[6]
mae['euler'] = MAE(tracking_base[5],tracking_base[6])
shifted_img['xcorr']
mae
Wie zu erwarten, schneidet die eulersche Perspektive mit einem MAE von 2.85 relativ schlecht ab und die langrangeschen Ansätze haben geringere MAE-Werte, bis auf das Kreuzkorrelationstracking.
Den geringsten Wert der Optical-Flow-Verfahren hat das Farnebäckverfahren mit 1.67, dann folgt das DIS-Verfahren in der Reihenfolge schnell (1.66) und ultraschnell (1.66), dann das TV-L1-Verfahren (1.68), und das DIS-Verfahren in der Variante medium (1.77), sowie das Simple Tracking (2.48). Die Verfahren mit den anhand der Trackausschnitte optimierten Parametern schneiden schlechter ab als die Standardparameter.
Das Kreuzkorrelationstracking hat einen größeren Fehler als die eulersche Referenz mit einem MAE-Wert von 2.98. Es ist folglich das einzige Verfahren, das keinen Gewinn gegenüber der Variante nicht zu verfolgen bringt.
fig,ax = plt.subplots(1,3,figsize=(15,5), sharex=True, sharey=True)
p0 = ax[0].imshow(tracking_base[5],vmin=210,vmax=300,cmap=emap)
fc.colourbar(p0)
ax[0].set_title("BT(10.8 µm), t0")
ax[0].axis("off")
p1 = ax[1].imshow(tracking_base[6],vmin=210,vmax=300,cmap=emap)
fc.colourbar(p1)
ax[1].set_title("BT(10.8 µm), t1")
ax[1].axis("off")
diff_plot = ax[2].imshow(tracking_base[6]-tracking_base[5],cmap='RdBu_r',vmin=-20,vmax=20)
fc.colourbar(diff_plot)
ax[2].set_title("(t1 - t0)/ K")
ax[2].axis("off")
fig.text(0.74,0.07,'MAE = {:1.2f}'.format(mae['euler']),fontsize=18)
plt.savefig("/vols/satellite/home/lenk/bt_difference_mae.png",bbox_inches='tight')
fig,ax = plt.subplots(2,7,figsize=(28,8),sharex=True,sharey=True)
for i, fm in enumerate(flow_res.keys()):
h_plot = ax[0,i].imshow(flow_res[fm][:,:,0],vmin=-1,vmax=1,cmap='RdBu_r')
ax[0,i].set_title("{}".format(flow_methods[fm]['label']))
#fc.colourbar(h_plot)
p = v_plot = ax[1,i].imshow(flow_res[fm][:,:,1],vmin=-1,vmax=1,cmap='RdBu_r')
#ax[1,i].set_title("{}\n vertical flow".format(flow_methods[fm]['label']))
#fc.colourbar(v_plot)
cbar = fig.colorbar(p, ax=ax.flat,label='speed / (px / 5 min)')
# Make common axis labels
fig.text(0.1, 0.72,u'horizontal flow', va='center', ha='center', rotation='vertical',fontsize=16)
fig.text(0.1, 0.3, u'vertical flow', va='center', ha='center', rotation='vertical',fontsize=16)
fig.text(0.135,0.05,'MAE = {:.2f}'.format(mae['simple']),fontsize=18)
fig.text(0.23,0.05,'MAE = {:.2f}'.format(mae['farnebaeck']),fontsize=18)
fig.text(0.32,0.05,'MAE = {:.2f}'.format(mae['tvl1']),fontsize=18)
fig.text(0.41,0.05,'MAE = {:.2f}'.format(mae['dis_fast']),fontsize=18)
fig.text(0.50,0.05,'MAE = {:.2f}'.format(mae['dis_medium']),fontsize=18)
fig.text(0.59,0.05,'MAE = {:.2f}'.format(mae['dis_superfast']),fontsize=18)
fig.text(0.68,0.05,'MAE = {:.2f}'.format(mae['xcorr']),fontsize=18)
plt.savefig("/vols/satellite/home/lenk/trackvergleich0.png", bbox_inches='tight')
fig,ax = plt.subplots(3,8,figsize=(40,15),sharex=True,sharey=True)
for i, fm in enumerate(shifted_img.keys()):
ax[0,i].imshow(tracking_base[5],vmin=210,vmax=300,cmap=emap)
if fm == 'euler':
ax[0,i].imshow(tracking_base[6],vmin=210,vmax=300,cmap=emap)
ax[0,i].set_title("Euler reference")
ax[1,i].imshow(tracking_base[5],vmin=210,vmax=300,cmap=emap)
ax[2,i].imshow(tracking_base[6]-tracking_base[5],vmin=-10,vmax=10,cmap='RdBu_r')
else:
ax[0,i].imshow(tracking_base[6],vmin=210,vmax=300,cmap=emap)
ax[0,i].set_title("{}".format(flow_methods[fm]['label']))
ax[1,i].imshow(shifted_img[fm],vmin=210,vmax=300,cmap=emap)
ax[2,i].imshow(tracking_base[6]-shifted_img[fm],vmin=-10,vmax=10,cmap='RdBu_r')
# Make common axis labels
fig.text(0.1, 0.76,u't1 image', va='center', ha='center', rotation='vertical',fontsize=18)
fig.text(0.1, 0.5, u't0 image shifted', va='center', ha='center', rotation='vertical',fontsize=18)
fig.text(0.1, 0.24, u'(t0 shifted - t1)', va='center', ha='center', rotation='vertical',fontsize=18)
fig.text(0.141,0.05,'MAE = {:1.2f}'.format(mae['simple']),fontsize=18)
fig.text(0.219,0.05,'MAE = {:1.2f}'.format(mae['farnebaeck']),fontsize=18)
fig.text(0.297,0.05,'MAE = {:1.2f}'.format(mae['tvl1']),fontsize=18)
fig.text(0.378,0.05,'MAE = {:1.2f}'.format(mae['dis_fast']),fontsize=18)
fig.text(0.456,0.05,'MAE = {:1.2f}'.format(mae['dis_medium']),fontsize=18)
fig.text(0.535,0.05,'MAE = {:1.2f}'.format(mae['dis_superfast']),fontsize=18)
fig.text(0.614,0.05,'MAE = {:1.2f}'.format(mae['xcorr']),fontsize=18)
fig.text(0.693,0.05,'MAE = {:1.2f}'.format(mae['euler']),fontsize=18)
cbar = fig.colorbar(p, ax=ax.flat,label='difference / K')
plt.savefig("/vols/satellite/home/lenk/track_mae_vergleich.png",bbox_inches='tight')
Bis jetzt haben wir uns nur den ersten Schritt angesehen. Vielleicht sehen die Ergebisse bei den anderen Schritten anders aus?
mae = {fm:[] for fm in list(flow_methods.keys())+['euler']}
time = []
for ti in range(1,len(tracking_base)):
f0 = tco.transform_array2picture(np.max(tracking_base[ti-1])-tracking_base[ti-1])
f1 = tco.transform_array2picture(np.max(tracking_base[ti])-tracking_base[ti])
flow_res = {k:[] for k in list(flow_methods.keys())}
for fm in list(flow_methods.keys()):
if fm in ['farnebaeck_opt','tvl1_opt']:
res = eval(flow_methods[fm]['function'])
elif fm in ['xcorr']:
res = eval(flow_methods[fm]['function'])
res_tuples = []
for i in range(0,51*51):
res_tuples.append([-res[0][1],-res[0][0]])
res = np.array(res_tuples).reshape((51,51,2))
else:
flow = eval(flow_methods[fm]['function'])
res = flow.calc(f0,f1,None)
flow_res[fm] = res
for fm in list(flow_res.keys()):
shifted_img = oflow.morph_trans_opt_flow(tracking_base[ti-1],flow_res[fm])
mae[fm].append(MAE(tracking_base[ti],shifted_img))
mae['euler'].append(MAE(tracking_base[ti],tracking_base[ti-1]))
time.append('[{},{}]'.format((ti-1)*5 -30 ,(ti*5)-30))
mae_df = pd.DataFrame(mae)
mae_df
len(time)
mre_df = mae_df.copy()
for c in mre_df.columns:
mre_df[c] = mre_df[c] / mre_df.euler
fig,ax = plt.subplots(2,1,figsize=(16,9),sharex=True)
mae_df.plot(ax=ax[0],legend=False)
ax[0].set_ylabel("MAE")
ax[0].set_title("MAE of the example track")
mre_df.plot(ax=ax[1])
ax[1].set_xticks(np.arange(0,12))
ax[1].set_xticklabels(time)
ax[1].set_xlabel("time of the shift relative to CI time / min")
ax[1].set_ylabel("MAE relative to Euler")
plt.legend(bbox_to_anchor=(1.01, 0.5), loc=3, borderaxespad=0.,title="tracking technique")
plt.savefig("/vols/satellite/home/lenk/mae_exampletrack.pdf",bbox_inches='tight')
mae_df.index.values
Das, was für den ersten Zeitschritt galt, gilt so ähnlich auch für den ganzen Track:
mre_df = mae_df.copy()
mre_df = mae_df.copy()
for c in mre_df.columns:
mre_df[c] = mre_df[c] / mre_df.euler
fig,ax = plt.subplots(1,1,figsize=(12,7.5))
mre_df.plot(ax=ax)
ax.set_xlabel("Zeitindex der Verschiebung")
ax.set_ylabel("MAE relativ zu Euler")
mre_df.mean()
Über den Track gemittelt sieht die Verteilung des MAE wie folgt aus:
Als nächstes sehen wir uns mal mehrere Tracks an. Nicht, dass das nur für diesen Track gilt.
Wir benutzen alle gefilterten HACI-Kandidaten für das Jahr 2013 und sehen uns dann mal an, wie sich der MAE verteilt.
import time, sys
from IPython.display import clear_output
import tqdm
def update_progress(progress):
bar_length = 20
if isinstance(progress, int):
progress = float(progress)
if not isinstance(progress, float):
progress = 0
if progress < 0:
progress = 0
if progress >= 1:
progress = 1
block = int(round(bar_length * progress))
clear_output(wait = True)
text = "Fortschritt: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
print(text)
def calculate_track_mae(tracking_base_data,flow_method_names):
mae = {fm:[] for fm in flow_method_names + ['euler']}
time = []
for ti in range(1,len(tracking_base)):
f0 = tco.transform_array2picture(np.max(tracking_base[ti-1])-tracking_base[ti-1])
f1 = tco.transform_array2picture(np.max(tracking_base[ti])-tracking_base[ti])
flow_res = {k:[] for k in list(flow_methods.keys())}
for fm in flow_method_names:
if fm in ['farnebaeck_opt','tvl1_opt']:
res = eval(flow_methods[fm]['function'])
elif fm in ['xcorr']:
res = eval(flow_methods[fm]['function'])
res_tuples = []
for i in range(0,51*51):
res_tuples.append([-res[0][1],-res[0][0]])
res = np.array(res_tuples).reshape((51,51,2))
else:
flow = eval(flow_methods[fm]['function'])
res = flow.calc(f0,f1,None)
flow_res[fm] = res
for fm in list(flow_res.keys()):
shifted_img = oflow.morph_trans_opt_flow(tracking_base[ti-1],flow_res[fm])
mae[fm].append(MAE(tracking_base[ti],shifted_img))
mae['euler'].append(MAE(tracking_base[ti],tracking_base[ti-1]))
time.append('{}_{}'.format(ti-1,ti))
mae_df = pd.DataFrame(mae,index=time)
return mae_df
track_maes = []
time_idx = []
techniques = []
track_ids = []
for j,case in tqdm.tqdm(enumerate(haci_objects_2013.index),total=len(haci_objects_2013.index)):
case = haci_objects_2013.iloc[j]
case_id = "{}_{}".format(case.date.strftime("%Y%m%d"),case.id)
#print(case_id)
try:
with xr.open_dataset("{}/track_{}.nc".format(case_data_path,case_id) )as file:
case_data = file.copy()
# tlist = pd.date_range(start = case.date - pd.Timedelta("30min"),
# end = case.date + pd.Timedelta("30min"),
# freq = "5min")
# tt = [t.to_pydatetime() for t in tlist]
tracking_base = case_data.ir108.data
# for i,t in enumerate(tlist):
# sat_obs = lsd.load_satellite_data(t,'IR_108')
# data_cutout = cutout_box(sat_obs,case.l0_msg_eu, case.c0_msg_eu, 51)
# #sat_data['CTTH_HEIGHT'].append(mst.get_nwcsaf_prod('CTTH_HEIGHT',t,calibrate=True))
# #px_corr = go.parallax_correct_msg(sat_obs,slon,slat,sat_data['CTTH_HEIGHT'][i],'std','eu','rss',3)
# #sat_data['IR_108'].append(sat_obs)
# tracking_base.append(data_cutout)
#for sd in sat_data['IR_108']:
# data_cutout = cutout_box(sd,case.l0_msg_eu, case.c0_msg_eu, 51)
# tracking_base.append(data_cutout)
track_mae = calculate_track_mae(tracking_base,list(flow_methods.keys()))
for col in track_mae.columns:
track_maes.extend(track_mae[col].values)
techniques.extend([col]*len(track_mae.index))
time_idx.extend(track_mae.index.values)
track_ids.extend([case_id]*len(track_mae.index))
# track_maes.append(calculate_track_mae(tracking_base,list(flow_methods.keys())))
# track_ids.append(case_id)
except Exception as e:
print("Fehler {} bei Fall {}.".format(e,case_id))
continue
#update_progress(j / len(haci_objects_2013.index))
mae_df = pd.DataFrame({'MAE':track_maes,
'technique':techniques,
'track_id':track_ids,
'time_index':time_idx})
mae_df.to_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/data/track_mae_2013.csv",index=False,float_format="%.3f")
mae_df.head()
mae_df = mae_df.replace('0_1','[-30,-25]')
mae_df = mae_df.replace('1_2','[-25,-20]')
mae_df = mae_df.replace('2_3','[-20,-15]')
mae_df = mae_df.replace('3_4','[-15,-10]')
mae_df = mae_df.replace('4_5','[-10,-5]')
mae_df = mae_df.replace('5_6','[-5,0]')
mae_df = mae_df.replace('6_7','[0,5]')
mae_df = mae_df.replace('7_8','[5,10]')
mae_df = mae_df.replace('8_9','[10,15]')
mae_df = mae_df.replace('9_10','[15,20]')
mae_df = mae_df.replace('10_11','[20,25]')
mae_df = mae_df.replace('11_12','[25,30]')
mae_df = mae_df.replace('simple','simple flow')
mae_df = mae_df.replace('farnebaeck',u'Farnebäck')
mae_df = mae_df.replace('tvl1','TV-L1')
mae_df = mae_df.replace('dis_fast','DIS fast')
mae_df = mae_df.replace('dis_medium','DIS medium')
mae_df = mae_df.replace('dis_superfast','DIS superfast')
mae_df = mae_df.replace('xcorr','cross. corr.')
mae_df = mae_df.replace('euler','Euler')
mae_df.head()
import seaborn as sns
sns.set_context("talk")
fig,ax = plt.subplots(1,1,figsize=(16,10))
sns.boxplot(x='time_index',y='MAE',data=mae_df[mae_df.technique!='farnebaeck_opt'],hue='technique',ax=ax)
ax.set_xlabel("time relative to CI time / min")
# Put the legend out of the figure
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,title="tracking technique")
ax.set_title("Tracking error for all cases 2013")
plt.savefig("/vols/satellite/home/lenk/mae_2031.pdf",bbox_inches='tight')
mean_mae = mae_df.groupby(by=['technique']).mean()
mean_mae.head()
fig,ax = plt.subplots(1,1,figsize=(36,9))
sns.violinplot(x='technique',y='MAE',data=mae_df[mae_df.time_index=='[-30,-25]'],ax=ax)
fig,ax = plt.subplots(1,1,figsize=(36,9))
ax = sns.violinplot(x='technique',y='MAE',data=mae_df[mae_df.time_index=='[-25,-20]'])
fig,ax = plt.subplots(1,1,figsize=(16,9))
ax = sns.pointplot(x="time_index",
y="MAE",
data=mae_df,
hue='technique',
capsize=0.1,
ci=95,
markers=["o", "x","+","v","^","<","s","d"],
# linestyles=["-",
# "-.",
# ":",
# (0, (3, 5, 1, 5)),
# (0, (5, 10)),
# (0, (5, 5)),
# (0, (5,1)),
# (0, (3, 1, 1, 1))],
linewidth=0.8,
dodge=True)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,title="tracking technique")
ax.set_title("Tracking error for all cases 2013")
plt.savefig("/vols/satellite/home/lenk/mae_2013.pdf",bbox_inches='tight')
means = mae_df.groupby(by=['technique','time_index']).mean()
stds = mae_df.groupby(by=['technique','time_index']).std()
stds
fig,ax = plt.subplots(1,1,figsize=(16,9))
means.loc['DIS fast'].plot()
import datetime as dt
track_mres = []
for m in track_maes:
mre_df = m.copy()
for c in mre_df.columns:
mre_df[c] = mre_df[c] / mre_df.euler
track_mres.append(mre_df)
track_mae = []
for m in track_maes:
mae_df = m.copy()
for c in mre_df.columns:
mae_df[c] = mae_df[c] / mae_df.euler
track_mae.append(mae_df)
mean_mres = {k:[] for k in track_mres[0].columns}
for m in track_mres:
mm = m.mean()
for c in mm.index:
mean_mres[c].append(mm[c])
mean_mres_df = pd.DataFrame(mean_mres,index= track_ids)
mean_mres_df.head()
mean_mres_df.median()
mean_mres_df.to_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/data/mean_mre_tracking.csv")
mean_maes = {k:[] for k in track_mres[0].columns}
track_maes
for m in track_maes:
mm = m.mean()
for c in mm.index:
mean_maes[c].append(mm[c])
mean_maes_df = pd.DataFrame(mean_maes,index=track_ids)
mean_maes_df.head()
mean_maes_df.median()
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_mres_df.index, mean_mres_df.dis_medium,label="DIS medium")
ax.plot(mean_mres_df.index, mean_mres_df.dis_fast,label="DIS fast")
ax.plot(mean_mres_df.index, mean_mres_df.dis_superfast,label="DIS ultrafast")
ax.axhline(1,c='k',label='eulersche Referenz')
ax.set_ylim(0,1.4)
ax.set_xlabel("Tracknummer")
ax.set_ylabel("MAE relativ zu Euler")
ax.set_title("DIS Flow")
ax.legend()
Der DIS-Flow hat für die meisten Fälle die kleinsten MAE-Werte von allen getesten Verfahren. Bei einigen Fällen ist der Fehler allerdings größer als bei der eulerschen Referenz. Dafür könnte es eine Reihe von Gründen geben:
dis_sf_mres = mean_mres_df.dis_superfast.copy()
dis_f_mres = mean_mres_df.dis_fast.copy()
dis_m_mres = mean_mres_df.dis_medium.copy()
dis_sf_mres[dis_sf_mres.idxmax()] = np.nan # Ausreißer
dis_f_mres[dis_f_mres.idxmax()] = np.nan # Ausreißer
dis_m_mres[dis_m_mres.idxmax()] = np.nan # Ausreißer
md_dis_sf = dis_sf_mres - dis_sf_mres.mean()
md_dis_f = dis_f_mres - dis_f_mres.mean()
md_dis_m = dis_m_mres - dis_m_mres.mean()
lst = 'none'
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(md_dis_sf,alpha=0.6,label='DIS, ultraschnell',marker='o',linestyle=lst,color='blue')
ax.plot(md_dis_f,alpha=0.6,label='DIS, schnell',marker='D',linestyle=lst,color='red')
ax.plot(md_dis_m,alpha=0.6,label='DIS, mittel',marker='s',linestyle=lst,color='green')
ax.set_ylim(-1,1)
ax.axhline(0,color='k',linestyle='dashed')
ax.set_title("DIS-MAE relativ zu Euler, Abweichung vom Mittelwert")
ax.legend()
Der DIS-Ansatz mit dem Parametersatz "schnell" hat meistens die geringsten Fehler und die ultraschnelle Variante meistens die größten Fehler.
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_mres_df.index, mean_mres_df.farnebaeck,label="Farnebäck")
ax.plot(mean_mres_df.index, mean_mres_df.farnebaeck_opt,label="Farnebäck, optimiert")
ax.axhline(1,c='k',label='eulersche Referenz')
ax.set_ylim(0,1.5)
ax.set_xlabel("Tracknummer")
ax.set_ylabel("MAE relativ zu Euler")
ax.set_title("Farnebäckansatz")
ax.legend()
fbc_mres = mean_mres_df.farnebaeck.copy()
fbco_mres = mean_mres_df.farnebaeck_opt.copy()
fbc_mres[fbc_mres.idxmax()] = np.nan # Ausreißer
fbco_mres[fbco_mres.idxmax()] = np.nan # Ausreißer
mean_deviation = fbc_mres - fbc_mres.mean()
mean_deviation_opt = fbco_mres - fbco_mres.mean()
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_deviation,alpha=0.8,label='Farnebäck',marker='o',linestyle=lst,color='blue')
ax.plot(mean_deviation_opt,alpha=0.5,label='Farnebäck optimiert',marker='D',linestyle=lst,color='red')
ax.set_ylim(-1,1)
ax.axhline(0,color='k',linestyle='dashed')
ax.set_title("Farnebäck-MAE relativ zu Euler, Abweichung vom Mittelwert")
ax.legend()
Die MAE-Werte des Farnebäckverfahrens sind im gleichen Bereich wie die des DIS-Verfahrens, allerdings meistens etwas höher. Die Verfahren scheinen sich aber ähnlich zu sein, weil die gleichen Fälle eher niedrige oder große MAE-Werte haben. Die "optimierte" Variante ist meist recht ähnlich wie die Variante mit den Standardparametern. Sie scheint bei einem Fall viel schlechter zu funktionieren. Aber ansonsten ist ihr Fehler meist etwas geringer.
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_mres_df.index, mean_mres_df.tvl1,label="Dual TV-L1")
ax.plot(mean_mres_df.index, mean_mres_df.tvl1_opt,label="Dual TV-L1, optimiert")
ax.axhline(1,c='k',label='eulersche Referenz')
ax.set_ylim(0,1.5)
ax.set_xlabel("Tracknummer")
ax.set_ylabel("MAE relativ zu Euler")
ax.set_title("Dual TV-L1")
ax.legend()
tvl1_mres = mean_mres_df.tvl1.copy()
tvl1_opt_mres = mean_mres_df.tvl1_opt.copy()
tvl1_mres[tvl1_mres.idxmax()] = np.nan # Ausreißer
tvl1_opt_mres[tvl1_opt_mres.idxmax()] = np.nan # Ausreißer
md_tvl1 = tvl1_mres - tvl1_mres.mean()
md_tvl1_opt = tvl1_opt_mres - tvl1_opt_mres.mean()
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(md_dis_sf,alpha=0.6,label='TV-L1',marker='o',linestyle=lst,color='blue')
ax.plot(md_dis_f,alpha=0.6,label='TV-L1, optimiert',marker='D',linestyle=lst,color='red')
ax.set_ylim(-1,1)
ax.axhline(0,color='k',linestyle='dashed')
ax.set_title("TV-L1-MAE relativ zu Euler, Abweichung vom Mittelwert")
ax.legend()
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_mres_df.index, mean_mres_df.xcorr,label="Kreuzkorrelation")
ax.axhline(1,c='k',label='eulersche Referenz')
ax.set_ylim(0,1.5)
ax.set_xlabel("Tracknummer")
ax.set_ylabel("MAE relativ zu Euler")
ax.set_title("Kreuzkorrelation")
ax.legend()
min_mae_method = []
max_mae_method = []
min_mae_value = []
max_mae_value = []
for i, r in mean_maes_df.iterrows():
min_mae_method.append(r.idxmin())
max_mae_method.append(r.idxmax())
min_mae_value.append(r.min())
max_mae_value.append(r.max())
min_mre_method = []
max_mre_method = []
min_mre_value = []
max_mre_value = []
for i, r in mean_mres_df.iterrows():
min_mre_method.append(r.idxmin())
max_mre_method.append(r.idxmax())
min_mre_value.append(r.min())
max_mre_value.append(r.max())
mae_evaluation_df = pd.DataFrame({'best_method':min_mae_method,
'worst_method':max_mae_method,
'best_method_value':min_mae_value,
'worst_method_value':max_mae_value})
mre_evaluation_df = pd.DataFrame({'best_method':min_mre_method,
'worst_method':max_mre_method,
'best_method_value':min_mre_value,
'worst_method_value':max_mre_value})
mae_evaluation_df.best_method.value_counts()
mae_evaluation_df.worst_method.value_counts()
mae_evaluation_df.groupby('best_method').median()
mae_evaluation_df.groupby('worst_method').median()
mre_evaluation_df.best_method.value_counts()
best_count_sum = mae_evaluation_df.best_method.value_counts().sum()
print(best_count_sum)
best_count_relative = {idx:[] for idx in mre_evaluation_df.best_method.value_counts().index}
for idx in mre_evaluation_df.best_method.value_counts().index:
v = mre_evaluation_df.best_method.value_counts()[idx]
best_count_relative[idx] = (v / best_count_sum) * 100
best_count_relative
worst_count_sum = mae_evaluation_df.worst_method.value_counts().sum()
print(worst_count_sum)
worst_count_relative = {idx:[] for idx in mre_evaluation_df.worst_method.value_counts().index}
for idx in mre_evaluation_df.worst_method.value_counts().index:
v = mre_evaluation_df.worst_method.value_counts()[idx]
worst_count_relative[idx] = (v / best_count_sum) * 100
worst_count_relative
rel_val = ((mae_evaluation_df.best_method.value_counts()['dis_fast'] + mae_evaluation_df.best_method.value_counts()['dis_medium']) / mae_evaluation_df.best_method.value_counts().sum()) * 100
Hier sieht das Bild etwas komplizierter aus. Bei den meisten Fällen funktionieren die Verfahren besser als die eulersche Referenz. Allerdings gibt es auch
NameError: name 'mae_evaluation_df' is not defined
Fälle, wo das nicht der Fall ist. Ein Fall sticht besonders heraus. E ist der Fall 20130620_2634. Hier scheinen wohl Daten zu fehlen.Die Methoden, die am bei den meisten Fällen den kleinsten MAE-Wert haben, sind die schnelle und die mittelschnelle Variante des DIS-Verfahrens. Sie sind die besten Verfahren bei
TypeError: unsupported format string passed to list.format
% der Fälle. Bei den anderen Fällen funktionieren allerdings auch das TV-L1- (TypeError: unsupported format string passed to list.format
%), das Farnebäckverfahren (TypeError: unsupported format string passed to list.format
%), und die superschnelle DIS-Variante (TypeError: unsupported format string passed to list.format
%) am besten.Die Methoden mit den höchsten MAE-Werten sind in
TypeError: unsupported format string passed to list.format
% der Fälle die "optimierte" TV-L1-Variante, die Kreuzkorrelation (TypeError: unsupported format string passed to list.format
%), die euelersche Referenz (TypeError: unsupported format string passed to list.format
%) und die einfache Verfahren (TypeError: unsupported format string passed to list.format
%)mean_maes_df.to_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/tracking_experiments/MAE_oflow.csv".format(local_data_path),
float_format = "%.3f")
mean_mres_df.to_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/tracking_experiments/relative_MAE_oflow.csv".format(local_data_path),
float_format = "%.3f")
mean_maes_df = pd.read_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/tracking_experiments/MAE_oflow.csv")
mean_mres_df = pd.read_csv("/vols/satellite/home/lenk/proj/2019-01_trackingstudie/tracking_experiments/relative_MAE_oflow.csv")
Es sieht also so aus, als könnten die schnellen und mittelschnellen DIS-Varianten das Standardverfahren werden. Allerdings sollten wir uns mal ansehen, wie die Fälle aussehen, wo sie nicht funktionieren.
Zuerst sehen wir uns den Fall an, wo alle Trackingverfahren Probleme hatten.
case_data = haci_objects[(haci_objects.date_str == "20130609") & (haci_objects.id==699)]
start_time = pd.Timestamp("{}T{}".format(case_data.date_str.values[0],case_data.time.values[0]))
print(start_time)
tlist = pd.date_range(start = start_time - pd.Timedelta("30min"),
end = start_time + pd.Timedelta("30min"),
freq = "5min")
sat_data = {ch:[] for ch in ['IR_108','CTTH_HEIGHT']}
for i,t in enumerate(tlist):
sat_obs = lsd.load_satellite_data(t,'IR_108')
sat_data['CTTH_HEIGHT'].append(mst.get_nwcsaf_prod('CTTH_HEIGHT',t,calibrate=True))
px_corr = go.parallax_correct_msg(sat_obs,slon,slat,sat_data['CTTH_HEIGHT'][i],'std','eu','rss',3)
sat_data['IR_108'].append(px_corr)
tracking_base = []
for sd in sat_data['IR_108']:
data_cutout = cutout_box(sd,case_data.l0_msg_eu.values[0], case_data.c0_msg_eu.values[0], 51)
tracking_base.append(data_cutout)
fig,ax = plt.subplots(4,4,figsize=(16,16),sharex=True,sharey=True)
axs=ax.ravel()
for i, tb in enumerate(tracking_base):
tb_plot = axs[i].imshow(tb,vmin=210,vmax=300,cmap=emap)
axs[i].set_title("t = {} min".format(i*5 - 30))
fc.colourbar(tb_plot)
fig.delaxes(ax[3,1])
fig.delaxes(ax[3,2])
fig.delaxes(ax[3,3])
Bei diesem Fall fehlen tatsächlich die Daten eines Zeitschrittes. Also sollten wir diesen Fall eventuell ausschließen.
Außer dem Ausreißer mit den fehlenden Satellitendaten, gibt es noch einige Fälle, bei denen die Fehlerwerte relativ hoch sind.
track_ids = []
for i,case in haci_objects_2013.iterrows():
tid = "{}_{}".format(case.date.strftime("%Y%m%d"), case.id)
track_ids.append(tid)
len(track_ids)
len(mean_mres_df.index)
Wir sehen uns mal die Abweichung vom Mittelwert und die Standardabweichung an. Fälle deren Werte höher als drei oder zwei Standardabweichungen sollten wir uns mal ansehen.
mean_dev = mean_mres_df.dis_fast - mean_mres_df.dis_fast.mean()
mean_mre = mean_mres_df.dis_fast.mean()
sd_mre = mean_mres_df.dis_fast.std()
mean_dev.head()
fig,ax = plt.subplots(1,1,figsize=(16,7.5))
ax.plot(mean_dev,label='DIS, schnell',marker='D',linestyle=lst,color='k')
ax.set_ylim(-1,1)
ax.axhline(0,color='b')
ax.set_title("DIS-MAE relativ zu Euler, Abweichung vom Mittelwert")
ax.legend()
ax.axhline(sd_mre,c='g')
ax.axhline(-sd_mre,c='g')
ax.axhline(2*sd_mre,c='orange')
ax.axhline(-2*sd_mre,c='orange')
ax.axhline(3*sd_mre,c='r')
ax.axhline(-3*sd_mre,c='r')
Es gibt tatsächlich einige Fälle, wo die Abweichungen vom Mittelwert höher sind als zwei oder drei Standardabweichungen.
ueber_2sd = mean_dev[mean_dev > 2 * sd_mre]
ueber_2sd
ueber_3sd = mean_dev[mean_dev > 3 * sd_mre]
ueber_3sd
Zuerst sehen wir uns mal die Fälle mit einer Abweichung von mehr als 3σ an.
ueber_3sd.index
track_ids_ueber_3sd =[]
for i in ueber_3sd.index:
track_ids_ueber_3sd.append(track_ids[i])
track_ids_ueber_3sd
track_data = {}
for tid in track_ids_ueber_3sd:
case_data = haci_objects[(haci_objects.date_str == "{}".format(tid.split("_")[0])) &
(haci_objects.id==int(tid.split("_")[1]))]
start_time = pd.Timestamp("{}T{}".format(case_data.date_str.values[0],case_data.time.values[0]))
tlist = pd.date_range(start = start_time - pd.Timedelta("30min"),
end = start_time + pd.Timedelta("30min"),
freq = "5min")
sat_data = {ch:[] for ch in ['IR_108','CTTH_HEIGHT']}
for i,t in enumerate(tlist):
sat_obs = lsd.load_satellite_data(t,'IR_108')
sat_data['CTTH_HEIGHT'].append(mst.get_nwcsaf_prod('CTTH_HEIGHT',t,calibrate=True))
px_corr = go.parallax_correct_msg(sat_obs,slon,slat,sat_data['CTTH_HEIGHT'][i],'std','eu','rss',3)
sat_data['IR_108'].append(px_corr)
track_cutout = []
for sd in sat_data['IR_108']:
data_cutout = cutout_box(sd,case_data.l0_msg_eu.values[0], case_data.c0_msg_eu.values[0], 51)
track_cutout.append(data_cutout)
track_data[tid] = track_cutout
for tid in track_data.keys():
fig,ax = plt.subplots(4,4,figsize=(16,16),sharex=True,sharey=True)
axs=ax.ravel()
for i, tb in enumerate(track_data[tid]):
tb_plot = axs[i].imshow(tb,vmin=210,vmax=300,cmap=emap)
axs[i].set_title("t = {} min".format(i*5 - 30))
fc.colourbar(tb_plot)
fig.delaxes(ax[3,1])
fig.delaxes(ax[3,2])
fig.delaxes(ax[3,3])
plt.suptitle("{}".format(tid))
Es sieht so aus als würden folgende Fälle Probleme machen:
Um das zu untermauern sehen wir uns noch ie Fälle mit einer Abweichung von mehr als 2σ an.
track_ids_ueber_2sd =[]
for i in ueber_2sd.index:
track_ids_ueber_2sd.append(track_ids[i])
track_data = {}
for tid in track_ids_ueber_2sd:
case_data = haci_objects[(haci_objects.date_str == "{}".format(tid.split("_")[0])) &
(haci_objects.id==int(tid.split("_")[1]))]
start_time = pd.Timestamp("{}T{}".format(case_data.date_str.values[0],case_data.time.values[0]))
tlist = pd.date_range(start = start_time - pd.Timedelta("30min"),
end = start_time + pd.Timedelta("30min"),
freq = "5min")
sat_data = {ch:[] for ch in ['IR_108','CTTH_HEIGHT']}
for i,t in enumerate(tlist):
sat_obs = lsd.load_satellite_data(t,'IR_108')
sat_data['CTTH_HEIGHT'].append(mst.get_nwcsaf_prod('CTTH_HEIGHT',t,calibrate=True))
px_corr = go.parallax_correct_msg(sat_obs,slon,slat,sat_data['CTTH_HEIGHT'][i],'std','eu','rss',3)
sat_data['IR_108'].append(px_corr)
track_cutout = []
for sd in sat_data['IR_108']:
data_cutout = cutout_box(sd,case_data.l0_msg_eu.values[0], case_data.c0_msg_eu.values[0], 51)
track_cutout.append(data_cutout)
track_data[tid] = track_cutout
for tid in track_data.keys():
fig,ax = plt.subplots(4,4,figsize=(16,16),sharex=True,sharey=True)
axs=ax.ravel()
for i, tb in enumerate(track_data[tid]):
tb_plot = axs[i].imshow(tb,vmin=210,vmax=300,cmap=emap)
axs[i].set_title("t = {} min".format(i*5 - 30))
fc.colourbar(tb_plot)
fig.delaxes(ax[3,1])
fig.delaxes(ax[3,2])
fig.delaxes(ax[3,3])
plt.suptitle("{}".format(tid))
Das oben geschriebene gilt auch hier. Fälle mit eher stationären Wolken und mit sehr großen gleichförmigen Bereichen sind schwer mit automatischen Verfahren zu tracken. Aber für die meisten Fälle funktionierte es recht gut.